{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "对backtrader进行改善优化：\n",
    "\n",
    "一：继承创建手续费stampDutyCommissionScheme类，根据中国股市设置佣金最低额度5元，已经印花税卖出单边收取模式。\n",
    "\n",
    "二：创建策略类Strategy_LJT：新策略可继承使用\n",
    "1.策略中增加回测订单信息输出。\n",
    "2.策略中增加成交结果信息输出。\n",
    "\n",
    "三：创建整体运行函数\n",
    "backtrader_run(code = \"600000\",\n",
    "                   start=\"2020-01-01\",\n",
    "                   end = \"2020-11-01\",\n",
    "                   strategy = Strategy_LJT,\n",
    "                   data = None,\n",
    "                   cash = 10000,\n",
    "                   commission = 0.0002,\n",
    "                   stamp_duty = 0.001,\n",
    "                   )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import backtrader_LJT as btl\n",
    "import backtrader as bt\n",
    "%matplotlib inline\n",
    "class my_strategy(btl.Strategy_LJT):\n",
    "   # 全局设定交易策略的参数\n",
    "    params = (\n",
    "        ('maperiod', 10),\n",
    "    )\n",
    "\n",
    "    def __init__(self):\n",
    "        # 指定价格序列\n",
    "        self.dataclose = self.datas[0].close\n",
    "        self.datavolume = self.datas[0].volume\n",
    "        # 初始化交易指令、买卖价格和手续费\n",
    "        self.order = None\n",
    "        self.buyprice = None\n",
    "        self.buycomm = None\n",
    "\n",
    "        # 添加移动均线指标，内置了talib模块\n",
    "        self.sma = bt.indicators.SimpleMovingAverage(\n",
    "            self.dataclose, period=self.params.maperiod)        \n",
    "        \n",
    "        self.sma_5 = bt.indicators.SimpleMovingAverage(\n",
    "            self.dataclose, period= 5)      \n",
    "        self.sma_20 = bt.indicators.SimpleMovingAverage(\n",
    "            self.dataclose, period= 20)      \n",
    "        self.sma_120 = bt.indicators.SimpleMovingAverage(\n",
    "            self.dataclose, period= 120)      \n",
    "        # 添加移动均线指标，内置了talib模块\n",
    "        self.volume_sma_5 = bt.indicators.SimpleMovingAverage(\n",
    "            self.datavolume, period= 5)\n",
    "        self.volume_sma_20 = bt.indicators.SimpleMovingAverage(\n",
    "            self.datavolume, period= 20)\n",
    "        self.volume_sma_120 = bt.indicators.SimpleMovingAverage(\n",
    "            self.datavolume, period= 120)\n",
    "\n",
    "    def next(self):\n",
    "        if self.order:  # 检查是否有指令等待执行,\n",
    "            print(self.order)\n",
    "            return\n",
    "        # 检查是否持仓\n",
    "        if not self.position:  # 没有持仓\n",
    "            # 执行买入条件判断：价格小于（20/120）均线，大于5日均线，且量小于均线（20/120）\n",
    "            if self.dataclose[0] < self.sma_20[0] and self.dataclose[0] < self.sma_120[0] and self.dataclose[0] > self.sma_5[0]\\\n",
    "                and self.data_volume[0] < self.volume_sma_20[0] and self.data_volume[0] < self.volume_sma_120[0]:\n",
    "                # 执行买入\n",
    "                self.order = self.buy(size=10000)\n",
    "        else:\n",
    "            # 执行卖出条件判断：价格高于（20/5）均线，小于5日均线，且量小于均线（20/5）\n",
    "            if self.dataclose[0] > self.sma_20[0]  and self.dataclose[0] < self.sma_5[0]\\\n",
    "                and self.data_volume[0] > self.volume_sma_20[0] and self.data_volume[0] > self.volume_sma_5[0]:                # 执行卖出\n",
    "                self.order = self.sell(size=10000)\n",
    "   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "btl.backtrader_run(code=\"159928\",\n",
    "                   strategy=my_strategy,\n",
    "                   start=\"2014-6-1\",\n",
    "                   end='2020-12-30',\n",
    "                   cash=30000,\n",
    "                   commission=0.0002,\n",
    "#                    trans_rate=0.00002,\n",
    "                   stamp_duty=0.001)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tushare as ts\n",
    "ts.get_k_data('600000', autype='qfq', start='2017-01-01', end='') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>news_id</th>\n",
       "      <th>news_time</th>\n",
       "      <th>event_code</th>\n",
       "      <th>event_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6827815</td>\n",
       "      <td>2017-01-01 00:04:00</td>\n",
       "      <td>DB002062</td>\n",
       "      <td>减税政策</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6827836</td>\n",
       "      <td>2017-01-01 00:04:00</td>\n",
       "      <td>AD002002009</td>\n",
       "      <td>减免借款</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6828335</td>\n",
       "      <td>2017-01-01 00:08:20</td>\n",
       "      <td>AA005001</td>\n",
       "      <td>裁员</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6828269</td>\n",
       "      <td>2017-01-01 00:12:00</td>\n",
       "      <td>DB001036</td>\n",
       "      <td>政策实施</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6827841</td>\n",
       "      <td>2017-01-01 00:12:00</td>\n",
       "      <td>DB001036</td>\n",
       "      <td>政策实施</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2454853</th>\n",
       "      <td>21838404</td>\n",
       "      <td>2020-06-30 23:57:04</td>\n",
       "      <td>CD008001</td>\n",
       "      <td>利润减少</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2454854</th>\n",
       "      <td>21838404</td>\n",
       "      <td>2020-06-30 23:57:04</td>\n",
       "      <td>GF012</td>\n",
       "      <td>重大合同</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2454855</th>\n",
       "      <td>21838406</td>\n",
       "      <td>2020-06-30 23:57:04</td>\n",
       "      <td>CD008016</td>\n",
       "      <td>业绩亏损</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2454856</th>\n",
       "      <td>21838458</td>\n",
       "      <td>2020-06-30 23:57:47</td>\n",
       "      <td>FA037</td>\n",
       "      <td>市值增加</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2454857</th>\n",
       "      <td>21838461</td>\n",
       "      <td>2020-06-30 23:57:48</td>\n",
       "      <td>CD009006</td>\n",
       "      <td>非标准审计报告</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2454858 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          news_id            news_time   event_code event_name\n",
       "0         6827815  2017-01-01 00:04:00     DB002062       减税政策\n",
       "1         6827836  2017-01-01 00:04:00  AD002002009       减免借款\n",
       "2         6828335  2017-01-01 00:08:20     AA005001         裁员\n",
       "3         6828269  2017-01-01 00:12:00     DB001036       政策实施\n",
       "4         6827841  2017-01-01 00:12:00     DB001036       政策实施\n",
       "...           ...                  ...          ...        ...\n",
       "2454853  21838404  2020-06-30 23:57:04     CD008001       利润减少\n",
       "2454854  21838404  2020-06-30 23:57:04        GF012       重大合同\n",
       "2454855  21838406  2020-06-30 23:57:04     CD008016       业绩亏损\n",
       "2454856  21838458  2020-06-30 23:57:47        FA037       市值增加\n",
       "2454857  21838461  2020-06-30 23:57:48     CD009006    非标准审计报告\n",
       "\n",
       "[2454858 rows x 4 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "event_label = pd.read_csv(\"..\\chinascope_news_data\\event_label.csv\")\n",
    "event_label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>news_id</th>\n",
       "      <th>news_time</th>\n",
       "      <th>industry_code</th>\n",
       "      <th>industry_name</th>\n",
       "      <th>relevance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6827783</td>\n",
       "      <td>2017-01-01 00:01:00</td>\n",
       "      <td>CP003</td>\n",
       "      <td>电脑与外围设备</td>\n",
       "      <td>0.19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6827783</td>\n",
       "      <td>2017-01-01 00:01:00</td>\n",
       "      <td>IT003</td>\n",
       "      <td>信息技术服务</td>\n",
       "      <td>0.44</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6827773</td>\n",
       "      <td>2017-01-01 00:02:00</td>\n",
       "      <td>CP003</td>\n",
       "      <td>电脑与外围设备</td>\n",
       "      <td>0.17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6827760</td>\n",
       "      <td>2017-01-01 00:02:00</td>\n",
       "      <td>UT005</td>\n",
       "      <td>独立电力生产商与能源贸易商</td>\n",
       "      <td>0.45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6827760</td>\n",
       "      <td>2017-01-01 00:02:00</td>\n",
       "      <td>PS008</td>\n",
       "      <td>调查和咨询服务</td>\n",
       "      <td>0.33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5750690</th>\n",
       "      <td>21838451</td>\n",
       "      <td>2020-06-30 23:57:46</td>\n",
       "      <td>ME001</td>\n",
       "      <td>广告</td>\n",
       "      <td>0.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5750691</th>\n",
       "      <td>21838453</td>\n",
       "      <td>2020-06-30 23:57:47</td>\n",
       "      <td>SS001</td>\n",
       "      <td>互联网软件与服务</td>\n",
       "      <td>0.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5750692</th>\n",
       "      <td>21838454</td>\n",
       "      <td>2020-06-30 23:57:47</td>\n",
       "      <td>IT003</td>\n",
       "      <td>信息技术服务</td>\n",
       "      <td>0.34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5750693</th>\n",
       "      <td>21838454</td>\n",
       "      <td>2020-06-30 23:57:47</td>\n",
       "      <td>CP003</td>\n",
       "      <td>电脑与外围设备</td>\n",
       "      <td>0.45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5750694</th>\n",
       "      <td>21838461</td>\n",
       "      <td>2020-06-30 23:57:48</td>\n",
       "      <td>RE005</td>\n",
       "      <td>房地产开发与经营</td>\n",
       "      <td>0.16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5750695 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          news_id            news_time industry_code  industry_name  relevance\n",
       "0         6827783  2017-01-01 00:01:00         CP003        电脑与外围设备       0.19\n",
       "1         6827783  2017-01-01 00:01:00         IT003         信息技术服务       0.44\n",
       "2         6827773  2017-01-01 00:02:00         CP003        电脑与外围设备       0.17\n",
       "3         6827760  2017-01-01 00:02:00         UT005  独立电力生产商与能源贸易商       0.45\n",
       "4         6827760  2017-01-01 00:02:00         PS008        调查和咨询服务       0.33\n",
       "...           ...                  ...           ...            ...        ...\n",
       "5750690  21838451  2020-06-30 23:57:46         ME001             广告       0.25\n",
       "5750691  21838453  2020-06-30 23:57:47         SS001       互联网软件与服务       0.50\n",
       "5750692  21838454  2020-06-30 23:57:47         IT003         信息技术服务       0.34\n",
       "5750693  21838454  2020-06-30 23:57:47         CP003        电脑与外围设备       0.45\n",
       "5750694  21838461  2020-06-30 23:57:48         RE005       房地产开发与经营       0.16\n",
       "\n",
       "[5750695 rows x 5 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "industry_label = pd.read_csv(\"..\\chinascope_news_data\\industry_label.csv\")\n",
    "industry_label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "ename": "MemoryError",
     "evalue": "Unable to allocate 61.8 MiB for an array with shape (8094590,) and data type float64",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mMemoryError\u001b[0m                               Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-4-000628f4890b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mcompany_label\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"..\\chinascope_news_data\\company_label.csv\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mcompany_label\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[0;32m    674\u001b[0m         )\n\u001b[0;32m    675\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 676\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    677\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    678\u001b[0m     \u001b[0mparser_f\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m    452\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    453\u001b[0m     \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 454\u001b[1;33m         \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    455\u001b[0m     \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    456\u001b[0m         \u001b[0mparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36mread\u001b[1;34m(self, nrows)\u001b[0m\n\u001b[0;32m   1131\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnrows\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1132\u001b[0m         \u001b[0mnrows\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_validate_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"nrows\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnrows\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1133\u001b[1;33m         \u001b[0mret\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1134\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1135\u001b[0m         \u001b[1;31m# May alter columns / col_dict\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36mread\u001b[1;34m(self, nrows)\u001b[0m\n\u001b[0;32m   2035\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnrows\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2036\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2037\u001b[1;33m             \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2038\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2039\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_first_chunk\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.read\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._read_low_memory\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers._concatenate_chunks\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32m<__array_function__ internals>\u001b[0m in \u001b[0;36mconcatenate\u001b[1;34m(*args, **kwargs)\u001b[0m\n",
      "\u001b[1;31mMemoryError\u001b[0m: Unable to allocate 61.8 MiB for an array with shape (8094590,) and data type float64"
     ]
    }
   ],
   "source": [
    "company_label = pd.read_csv(\"..\\chinascope_news_data\\company_label.csv\")\n",
    "company_label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] File ..\\chinascope_news_data\news_info.csv does not exist: '..\\\\chinascope_news_data\\news_info.csv'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-5-3464c277d63d>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mnews_info\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"..\\chinascope_news_data\\news_info.csv\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mnews_info\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[0;32m    674\u001b[0m         )\n\u001b[0;32m    675\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 676\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    677\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    678\u001b[0m     \u001b[0mparser_f\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m    446\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    447\u001b[0m     \u001b[1;31m# Create the parser.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 448\u001b[1;33m     \u001b[0mparser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfp_or_buf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    449\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    450\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m    878\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"has_index_names\"\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"has_index_names\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    879\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 880\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    881\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    882\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[1;34m(self, engine)\u001b[0m\n\u001b[0;32m   1112\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"c\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1113\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"c\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1114\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1115\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1116\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"python\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, src, **kwds)\u001b[0m\n\u001b[0;32m   1889\u001b[0m         \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"usecols\"\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0musecols\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1890\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1891\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1892\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0munnamed_cols\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0munnamed_cols\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1893\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] File ..\\chinascope_news_data\news_info.csv does not exist: '..\\\\chinascope_news_data\\news_info.csv'"
     ]
    }
   ],
   "source": [
    "news_info = pd.read_csv(\"..\\chinascope_news_data\\news_info.csv\")\n",
    "news_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "import pymysql"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "file=open(\"..\\chinascope_news_data\\company_label.csv\",'r',encoding='gbk')\n",
    "reder = csv.reader(file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "ename": "OperationalError",
     "evalue": "(2003, \"Can't connect to MySQL server on 'localhost' ([WinError 10061] 由于目标计算机积极拒绝，无法连接。)\")",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mConnectionRefusedError\u001b[0m                    Traceback (most recent call last)",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pymysql\\connections.py\u001b[0m in \u001b[0;36mconnect\u001b[1;34m(self, sock)\u001b[0m\n\u001b[0;32m    568\u001b[0m                         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 569\u001b[1;33m                             sock = socket.create_connection(\n\u001b[0m\u001b[0;32m    570\u001b[0m                                 \u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhost\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mport\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnect_timeout\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\socket.py\u001b[0m in \u001b[0;36mcreate_connection\u001b[1;34m(address, timeout, source_address)\u001b[0m\n\u001b[0;32m    807\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 808\u001b[1;33m             \u001b[1;32mraise\u001b[0m \u001b[0merr\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    809\u001b[0m         \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\socket.py\u001b[0m in \u001b[0;36mcreate_connection\u001b[1;34m(address, timeout, source_address)\u001b[0m\n\u001b[0;32m    795\u001b[0m                 \u001b[0msock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource_address\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 796\u001b[1;33m             \u001b[0msock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msa\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    797\u001b[0m             \u001b[1;31m# Break explicitly a reference cycle\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mConnectionRefusedError\u001b[0m: [WinError 10061] 由于目标计算机积极拒绝，无法连接。",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[1;31mOperationalError\u001b[0m                          Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-3-e7355a1d4683>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m# 打开数据库连接\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mdb\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpymysql\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"localhost\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m\"root\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m\"111111\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[1;31m# 使用 cursor() 方法创建一个游标对象 cursor\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[0mdb\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'database dbname1'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pymysql\\__init__.py\u001b[0m in \u001b[0;36mConnect\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m     92\u001b[0m     \"\"\"\n\u001b[0;32m     93\u001b[0m     \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mconnections\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mConnection\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 94\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mConnection\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     95\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     96\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mconnections\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0m_orig_conn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pymysql\\connections.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, host, user, password, database, port, unix_socket, charset, sql_mode, read_default_file, conv, use_unicode, client_flag, cursorclass, init_command, connect_timeout, ssl, read_default_group, compress, named_pipe, autocommit, db, passwd, local_infile, max_allowed_packet, defer_connect, auth_plugin_map, read_timeout, write_timeout, bind_address, binary_prefix, program_name, server_public_key)\u001b[0m\n\u001b[0;32m    325\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_sock\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    326\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 327\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    328\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    329\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_create_ssl_ctx\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msslp\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pymysql\\connections.py\u001b[0m in \u001b[0;36mconnect\u001b[1;34m(self, sock)\u001b[0m\n\u001b[0;32m    617\u001b[0m                 \u001b[0mexc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtraceback\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtraceback\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat_exc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    618\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[0mDEBUG\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtraceback\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 619\u001b[1;33m                 \u001b[1;32mraise\u001b[0m \u001b[0mexc\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    620\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    621\u001b[0m             \u001b[1;31m# If e is neither DatabaseError or IOError, It's a bug.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mOperationalError\u001b[0m: (2003, \"Can't connect to MySQL server on 'localhost' ([WinError 10061] 由于目标计算机积极拒绝，无法连接。)\")"
     ]
    }
   ],
   "source": [
    "# 打开数据库连接\n",
    "db = pymysql.connect(\"localhost\",\"root\",\"111111\")\n",
    "# 使用 cursor() 方法创建一个游标对象 cursor\n",
    "db.cursor().execute('database dbname1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sqlalchemy\n",
    "from flask_sqlalchemy import SQLAlchemy\n",
    "from flask import Flask\n",
    "import pymysql\n",
    "# from sqlalchemy import Datetime\n",
    "\n",
    "app = Flask(__name__)\n",
    "\n",
    "app.config[\"SQLALCHEMY_DATABASE_URI\"] = \"sqlite:///\" + \"../chinascope_news_data/news.db\"\n",
    "app.config[\"SQLALCHEMY_TRACK_MODIFICATIONS\"] = False\n",
    "app.config[\"SECRET_KEY\"] = '123456'\n",
    "db = SQLAlchemy(app)################################################\n",
    "\n",
    "class Company_label(db.Model):\n",
    "    __tablename__ = \"company_label\"\n",
    "    news_id = db.Column(db.Integer,primary_key=True) #主键\n",
    "    news_time = db.Column(db.DateTime,nullable=False)\n",
    "    sec_code = db.Column(db.String(64),nullable=False)\n",
    "    company_name = db.Column(db.String(64),nullable=False)\n",
    "    relevance = db.Column(db.Float,nullable=False)\n",
    "    pos = db.Column(db.Float,nullable=False)\n",
    "    neg = db.Column(db.Float,nullable=False)\n",
    "    neu = db.Column(db.Float,nullable=False)\n",
    "\n",
    "class Event_label(db.Model):\n",
    "    __tablename__ = \"event_label\"\n",
    "    news_id = db.Column(db.Integer,primary_key=True) #主键\n",
    "    news_time = db.Column(db.DateTime,nullable=False)\n",
    "    event_code = db.Column(db.String(64),nullable=False)\n",
    "    event_name = db.Column(db.String(64),nullable=False)\n",
    "\n",
    "class Industry_label(db.Model):\n",
    "    __talbename__ = \"industry_label\"\n",
    "    news_id = db.Column(db.Integer,primary_key=True) #主键\n",
    "    news_time = db.Column(db.DateTime,nullable=False)\n",
    "    industry_code = db.Column(db.String(64),nullable=False)\n",
    "    industry_name = db.Column(db.String(64),nullable=False)\n",
    "    relevance = db.Column(db.Float,nullable=False)\n",
    "\n",
    "\n",
    "class News_info(db.Model):\n",
    "    __talbename__ = \"news_info\"\n",
    "    news_id = db.Column(db.Integer,primary_key=True) #主键\n",
    "    news_time = db.Column(db.DateTime,nullable=False)\n",
    "    news_title = db.Column(db.String(100),nullable=False)\n",
    "    news_url = db.Column(db.String(256),nullable=False)\n",
    "    \n",
    "db.create_all()##########################创建数据库\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
